#!/usr/bin/python3
# -*- coding: UTF-8 -*-
# bing 搜索关键字
# http://library.sysu.edu.cn/print-service
# 编写一个项目，将自己所监听的网页收集起来
import requests
import time
from bs4 import BeautifulSoup

url = 'https://cn.bing.com/'
payload = {
    'q': ' 图书馆 复印',
    'qs': 'HS',
    'sc': '8-0',
    'cvid': 'F62E476F1C59478FAFD948644425F6AA',
    'FORM': 'QBLH',
    'sp': 1
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
}
ses = requests.Session()
rsp = ses.get(url, headers=headers, params=payload)
# print(rsp.text)
# with open('/Users/gaofeng/Documents/HTML/gooPage.git/python/temp.html', 'wb') as fd:
#     for chunk in rsp.iter_content(1024):
#         fd.write(chunk)
soup = BeautifulSoup(rsp.text, 'html.parser')
liArr = soup.find_all(class_='b_algo')
# lia = liArr[0]
lista = []
for lia in liArr:
    title = lia.h2.get_text()
    urlt = lia.h2.a['href']
    cnt = lia.div.p.get_text()
    str1 = title+'\n'+urlt+'\n'+cnt+'\n'+'----\n'
    lista.append(str1)

payload['sk'] = 'HS2'
payload['sp'] = 3
payload['FORM'] = 'PORE'
for i in range(91, 501):
    time.sleep(8)
    payload['first'] = i*10+1
    rsp = ses.get(url, headers=headers, params=payload)
    if i % 10 == 0:
        fm = str(i) + '-10.txt'
        liacnt = '\n'.join(lista)
        with open(fm, 'w', encoding='utf-8') as fd:
            # for chunk in lista.iter_content(1024):
            fd.write(liacnt)
        lista = []
        print('完成', fm)

    # print(rsp.status_code,payload)
    soup = BeautifulSoup(rsp.text, 'html.parser')
    liArr = soup.find_all(class_='b_algo')
    # print('liarr ', len(liArr))
    # time.sleep(4)
    for lia in liArr:
        title = lia.h2.get_text()
        urlt = lia.h2.a['href']
        cnt = ''
        try:
            cnt = lia.div.p.get_text()
        except Exception as err:
            cnt = str(err)
        str1 = title+'\n'+urlt+'\n'+cnt+'\n'+'----\n'
        lista.append(str1)

# print(len(lista), lista)
